require(tidyverse)
# cleanProjidsInDataTable
#
# = input =
# datatable: two-dimensional table, such as imported by tidyverse::read_csv
# projid_columns: vector of column names, each of which should appear exactly once in datatable
#
# = output =
# a copy of the datatable where the projid_columns have each been cleaned
# into a vector of character strings exactly eight digits
cleanProjidsInDataTable = function(datatable, projid_columns) {
copytable = data.table::copy(datatable)
for (column in projid_columns) {
selection = select(datatable, all_of(column))
if (length(selection) == 1) {
copytable[[column]] = cleanProjidsInVector(selection[[column]])
} else {
stop(paste("Unique '", column, "' column not found in datatable.", sep="", collapse=NULL))
}
}
return(copytable)
}
cleanProjidsInVector = function(values) {
if (is.integer(values)) {
return(sapply(values, cleanIntegerProjid))
} else if (is.double(values)) {
return(sapply(values, cleanDoubleProjid))
} else if (is.character(values)) {
return(sapply(values, cleanStringProjid))
} else {
stop("Invalid vector type for cleaning projids.")
}
}
cleanIntegerProjid = function(value) {
if (is.integer(value)) {
if (is.na(value)) {
return(NA)
} else if (value >= 0 && value <= 99999999) {
string = as.character(value)
zeroes = createLeadingZeroes(as.integer(8)-nchar(string))
return(paste(zeroes, string, sep="", collapse=""))
} else {
stopCleaningProjid(value)
}
} else {
stop("Attempted to clean non-integer value as integer projid.")
}
}
createLeadingZeroes = function(count) {
if (is.integer(count) && count >= 0) {
return(paste(rep("0", count), collapse=""))
} else {
stop(paste("Invalid count of leading zeroes: ", count, sep="", collapse=NULL))
}
}
cleanDoubleProjid = function(value) {
if (is.double(value)) {
if (is.na(value)) {
return(NA)
} else if (value == as.integer(value)) {
return(cleanIntegerProjid(as.integer(value)))
} else {
stopCleaningProjid(value)
}
} else {
stop("Attempted to clean non-double value as double projid.")
}
}
cleanStringProjid = function(value) {
if (is.character(value)) {
if (is.na(value)) {
return(NA)
} else if (str_detect(value, "^\\d{1,8}$")) {
return(cleanIntegerProjid(as.integer(value)))
} else {
stopCleaningProjid(value)
}
} else {
stop("Attempted to clean non-character value as character projid.")
}
}
stopCleaningProjid = function(projid) {
stop(paste("Invalid projid value for cleaning: ", value, sep="", collapse=NULL))
}resource_dir = "/pastel/resources/20220203_snRNAseq_AMPAD/"
annotation = readRDS(paste0(resource_dir,"updated_annotations/annotations.rds")) # 1638882 rows
annotation = cleanProjidsInDataTable(annotation,"projid")
load(paste0(resource_dir, "updated_annotations/celltype_exp.RData"))
annotation.filt = annotation[annotation$projid %in% pheno.filt$projid,]
# pheno.filtlength(unique(pheno.filt$projid))## [1] 424
length(unique(annotation.filt$projid))## [1] 424
Final consensus cognitive diagnosis: cogdx. Clinical consensus diagnosis of cognitive status at time of death.
counts_cogdx = as.data.frame(table(pheno.filt$cogdx))
colnames(counts_cogdx) = c("cogdx", "frequency")
counts_cogdx$cogdx_description = c("1 NCI: No cognitive impairment",
"2 MCI: Mild cognitive impairment (One impaired domain) and NO other cause of CI",
"3 MCI: Mild cognitive impairment (One impaired domain) AND another cause of CI",
"4 AD: Alzheimer’s dementia and NO other cause of CI",
"5 AD: Alzheimer’s dementia AND another cause of CI",
"6 Other dementia: Other primary cause of dementia")
createDT(counts_cogdx)createDT(as.tibble(unique(annotation.filt[, c("cell.type", "class")])))## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
What is the total number of cells by donor?
length(unique(annotation.filt$projid))## [1] 424
cell_perc <- annotation.filt %>% group_by(projid, cell.type) %>% summarise(n_cells = n()) %>% ungroup() %>% group_by(projid) %>% mutate(perc_cells = 100*(n_cells/sum(n_cells)))
cell_perc_wide <- cell_perc %>% select(projid,cell.type,perc_cells) %>% pivot_wider(values_from = perc_cells, names_from = cell.type) # cell proportions by donor
write.table(cell_perc_wide, file = paste0(resource_dir, "updated_annotations/cellprop_snRNAseq.txt"), quote = F, row.names = F, sep = "\t")
cellsBydonor = annotation.filt %>% group_by(projid) %>% summarise(n_cells = n()) %>% arrange(n_cells)
createDT(cellsBydonor)cell_perc$projid = as.character(cell_perc$projid)
cell_perc_diag = cell_perc %>% left_join(pheno.filt[, c("projid", "cogdx")] )
cell_perc_diag$cogdx_group = as.character(cell_perc_diag$cogdx)
cell_perc_diag$cogdx_group[cell_perc_diag$cogdx_group %in% c("1")] <- "NCI" # Normal cognition
cell_perc_diag$cogdx_group[cell_perc_diag$cogdx_group %in% c("2", "3")] <- "MCI"
cell_perc_diag$cogdx_group[cell_perc_diag$cogdx_group %in% c("4", "5", "6")] <- "AD"
cell_color_map = list(`Excitatory Neurons` = ggsci::pal_d3("category10")(10)[1],
`Inhibitory Neurons` = ggsci::pal_d3("category10")(10)[2],
Oligodendrocytes = ggsci::pal_d3("category10")(10)[3],
Astrocyte = ggsci::pal_d3("category10")(10)[4],
Microglia = ggsci::pal_d3("category10")(10)[5],
OPCs = ggsci::pal_d3("category10")(10)[6],
Endothelial = ggsci::pal_d3("category10")(10)[9],
Other = "gray")
cell_labels = list(ext = "Excitatory Neurons",
inh = "Inhibitory Neurons",
oli = "Oligodendrocytes",
ast = "Astrocyte",
mic = "Microglia",
opc = "OPCs",
end = "Endothelial",
other = "Other")
cell_perc_diag$cell.type_group = cell_perc_diag$cell.type
cell_perc_diag$cell.type_group[! cell_perc_diag$cell.type_group %in% as.character(names(cell_color_map))] <- "Other"
donor_order <- cell_perc_diag %>% filter(cell.type=="Excitatory Neurons") %>% select(projid, perc_cells)
donor_order <- donor_order[order(donor_order$perc_cells, decreasing = T),]
donor_order$n_order = 1:nrow(donor_order)
cell_perc_diag <- cell_perc_diag %>% left_join(donor_order[,c("projid","n_order")])
ggplot(cell_perc_diag, aes(x = reorder(projid, n_order, sum), y=perc_cells, fill=cell.type_group)) +
geom_bar(stat = "identity") +
facet_wrap(~cogdx_group, scales = "free_x") +
scale_fill_manual(values=cell_color_map) +
theme_classic() +
easy_remove_x_axis() +
labs(x = "Donor", y = "% of cells", fill = "Cell type") cell_prop_total = annotation.filt %>% group_by(cell.type) %>% summarise(n_cells = n()) %>% mutate(perc_cells = n_cells/sum(n_cells)) %>%
arrange(-perc_cells) %>% select(cell.type, perc_cells) %>% mutate(perc_cells = scales::percent(perc_cells))
createDT(cell_prop_total)class_prop_total = annotation.filt %>% group_by(class) %>% summarise(n_cells = n()) %>% mutate(perc_cells = n_cells/sum(n_cells)) %>%
arrange(-perc_cells) %>% select(class, perc_cells) %>% mutate(perc_cells = scales::percent(perc_cells))
createDT(class_prop_total)counts_sex = as.data.frame( table(pheno.filt$msex))
colnames(counts_sex) = c("Sex", "Frequency")
counts_sex$Sex = as.character(counts_sex$Sex)
counts_sex$Sex[counts_sex$Sex == 1] = "Male"
counts_sex$Sex[counts_sex$Sex == 0] = "Female"
kable(counts_sex) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))| Sex | Frequency |
|---|---|
| Female | 288 |
| Male | 136 |
Variable: age at death.
message(paste0("Age max: ", max(pheno.filt$age_death)))## Age max: 108.282
message(paste0("Age min: ", min(pheno.filt$age_death)))## Age min: 67.37
ageByDonor = unique(pheno.filt[,c("projid", "age_death", "msex")])
ageByDonor$msex = as.character(ageByDonor$msex)
ageByDonor$msex[ageByDonor$msex == 1] = "Male"
ageByDonor$msex[ageByDonor$msex == 0] = "Female"
mean_f = mean(ageByDonor[ageByDonor$msex == "Female", "age_death"], na.rm = T)
mean_m = mean(ageByDonor[ageByDonor$msex == "Male", "age_death"], na.rm = T)
message(paste0("Age mean female: ", mean_f))## Age mean female: 90.1211701388889
message(paste0("Age mean male: ", mean_m))## Age mean male: 87.2278014705882
ggplot(ageByDonor, aes(x=age_death, fill=msex)) +
geom_histogram(bins = 25, colour='black', position = "stack") +
labs(x="Age", y="Donors") +
# scale_y_continuous(breaks = (1:20)) +
# scale_x_continuous(breaks=seq(20,120,10)) +
geom_vline(xintercept=mean_f, color = "red", linetype="dashed") +
geom_vline(xintercept=mean_m, color = "blue", linetype="dashed") +
theme_classic()cogdx = Clinical consensus diagnosis of cognitive status at time of death.
Variable: cogng_random_slope. Estimated slope from random effects model for global cognition.
# head(pheno.filt[,c("projid", "amyloid", "cogdx", "tangles", "cogng_random_slope")])
ggplot(data = pheno.filt, aes(x=projid, y = cogng_random_slope)) +
geom_point() +
facet_wrap(~cogdx) +
easy_remove_x_axis() +
labs(x = "Individual", y = "Slope for global cognition")Overall amyloid level - Mean of 8 brain regions.
ggplot(data = pheno.filt, aes(x=projid, y = amyloid)) +
geom_point() +
facet_wrap(~cogdx) +
easy_remove_x_axis() +
labs(x = "Individual", y = "Amyloid level")Tangle density - Mean of 8 brain regions.
ggplot(data = pheno.filt, aes(x=projid, y = tangles)) +
geom_point() +
facet_wrap(~cogdx) +
scale_y_continuous(breaks = seq(0, 60, by=10)) +
easy_remove_x_axis() +
labs(x = "Individual", y = "Tangles density") T test for pairwise comparisons.
Anova for global p-value.
pheno.filt$cogdx_3gp_desc = pheno.filt$cogdx_3gp
pheno.filt$cogdx_3gp_desc[pheno.filt$cogdx_3gp_desc == 1] <- "NCI"
pheno.filt$cogdx_3gp_desc[pheno.filt$cogdx_3gp_desc == 2] <- "MCI"
pheno.filt$cogdx_3gp_desc[pheno.filt$cogdx_3gp_desc == 3] <- "AD"
my_comparisons = list(c("NCI", "AD"), c("NCI", "MCI"), c("MCI", "AD"))
p1 <- ggplot(pheno.filt, aes(x = cogdx_3gp_desc, y = cogng_random_slope)) +
geom_boxplot(notch = F, na.rm = T) +
stat_compare_means(comparisons = my_comparisons, method = "t.test")+
stat_compare_means(method = "anova", label.y = 0.4) +
geom_jitter() +
labs(x = "Diagnosis of cognitive status", y = "Slope for global cognition") +
theme_classic()
p2 <- ggplot(pheno.filt, aes(x = cogdx_3gp_desc, y = amyloid)) +
geom_boxplot(notch = F, na.rm = T) +
stat_compare_means(comparisons = my_comparisons, method = "t.test")+
stat_compare_means(method = "anova", label.y = 30) +
geom_jitter() +
labs(x = "Diagnosis of cognitive status", y = "Amyloid levels") +
theme_classic()
p3 <- ggplot(pheno.filt, aes(x = cogdx_3gp_desc, y = tangles)) +
geom_boxplot(notch = F, na.rm = T) +
stat_compare_means(comparisons = my_comparisons, method = "t.test")+
stat_compare_means(method = "anova", label.y = 90) +
geom_jitter() +
labs(x = "Diagnosis of cognitive status", y = "Tangles density") +
theme_classic()
grid.arrange(p1, p2, p3, ncol=3)sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 kableExtra_1.3.4 ggpubr_0.4.0 ggeasy_0.1.3
## [5] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4
## [9] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7 ggplot2_3.3.6
## [13] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.3 sass_0.4.1 jsonlite_1.8.0 viridisLite_0.4.0
## [5] carData_3.0-5 modelr_0.1.8 bslib_0.3.1 assertthat_0.2.1
## [9] highr_0.9 cellranger_1.1.0 yaml_2.3.5 pillar_1.7.0
## [13] backports_1.4.1 glue_1.6.2 digest_0.6.29 ggsignif_0.6.3
## [17] rvest_1.0.2 colorspace_2.0-3 htmltools_0.5.2 pkgconfig_2.0.3
## [21] broom_0.8.0 haven_2.5.0 scales_1.2.0 webshot_0.5.3
## [25] svglite_2.1.0 tzdb_0.3.0 generics_0.1.2 farver_2.1.0
## [29] car_3.0-13 ellipsis_0.3.2 DT_0.23 withr_2.5.0
## [33] cli_3.3.0 magrittr_2.0.3 crayon_1.5.1 readxl_1.4.0
## [37] evaluate_0.15 fs_1.5.2 fansi_1.0.3 rstatix_0.7.0
## [41] xml2_1.3.3 tools_4.1.2 data.table_1.14.2 hms_1.1.1
## [45] lifecycle_1.0.1 munsell_0.5.0 reprex_2.0.1 ggsci_2.9
## [49] compiler_4.1.2 jquerylib_0.1.4 systemfonts_1.0.4 rlang_1.0.2
## [53] grid_4.1.2 rstudioapi_0.13 htmlwidgets_1.5.4 crosstalk_1.2.0
## [57] labeling_0.4.2 rmarkdown_2.14 gtable_0.3.0 abind_1.4-5
## [61] DBI_1.1.2 R6_2.5.1 lubridate_1.8.0 knitr_1.39
## [65] fastmap_1.1.0 utf8_1.2.2 stringi_1.7.6 vctrs_0.4.1
## [69] dbplyr_2.1.1 tidyselect_1.1.2 xfun_0.31